home *** CD-ROM | disk | FTP | other *** search
-
-
- The PC Assembler Tutor - Copyright (C) 1990 Chuck Nelson
- ______________________
-
-
- MISHMASH
-
-
- This document contains several assembler programs. It has no page breaks and
- no footnotes so you can cut the programs directly out of the text with a word
- processor.
-
-
- BLOCK MOVE
-
- The first subroutine does a block move from one place in memory to another. It
- is designed so the sorce block and the target block can be overlapping. It
- first calculates the total address of the sorce block and the target block. If
- the sorce block is below the target block the move starts at the top of the
- source block and moves down. If the source block is above the target block the
- move starts at the bottom of the source block and moves up. This makes sure
- that overlapping data will not be clobbered.
-
- This calculates the full 20 bit address. It was designed for BASIC; BASIC
- sometimes requires the full 20 bit address. For many languages, all you need
- to do is look at the offset addresses since segments cannot overlap. This is
- NOT true of something called the "HUGE" mode, where you need to calculate the
- full 20 bit address.
-
-
- +++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
-
- include /pushregs.mac
- _TEXT SEGMENT PUBLIC 'CODE'
- ASSUME cs:_TEXT
- PUBLIC BlockMove
- ; - - - - - - - - - -
- ; BlockMove ( from.seg, from.off, to.seg, to.off, byte.count)
- ; for BASIC
- ; MOVSW is from DS:[SI] to ES:[DI]
-
- FROM_SEG_ADDRESS EQU [bp+14]
- FROM_OFFSET_ADDRESS EQU [bp+12]
- TO_SEG_ADDRESS EQU [bp+10]
- TO_OFFSET_ADDRESS EQU [bp+8]
- BYTE_COUNT_ADDRESS EQU [bp+6]
- ; - - - - - - - - - -
- BlockMove proc far
- push bp
- mov bp, sp
- PUSHREGS ax, bx, cx, dx, si, di, es, ds
-
- ; AX:BX is the total FROM address
- ; DX:DI is the total TO address
- ; (FROM address > TO address) -> upwards
- ; (FROM address < TO address) -> downwards
-
- ; calculate 20 bit total address
- sub ax, ax ; zero AX
- mov si, FROM_SEG_ADDRESS
- mov bx, [si] ; from_seg to BX
- sub dx, dx ; zero DX
- mov si, TO_SEG_ADDRESS
- mov di, [si] ; to_seg to DI
-
- mov cx, 4 ; shift 4 bytes
- shift_loop:
- shl bx, 1
- rcl ax, 1 ; carry from BX -> AX
- shl di, 1
- rcl dx, 1 ; carry from DI -> DX
- loop shift_loop
-
- ; AX:BX and DX:DI now contain the total address of the
- ; segment start. Now add the offsets.
- mov si, FROM_OFFSET_ADDRESS
- add bx, [si]
- adc ax, 0
- mov si, TO_OFFSET_ADDRESS
- add di, [si]
- adc dx, 0
-
- ; AX:BX and DX:DI are now the total addresses of the first
- ; byte to be moved. First compare AX and DX and go to the
- ; appropriate routine depending on which address is higher.
- ; If AX and DX are the same, then compare BX and DI and go
- ; to the appropriate routine. If BX = DI, the block is being
- ; moved onto itself, so just exit (there is no work to be done).
-
- cmp ax, dx
- ja bottom_to_top ; FROM is higher
- jb top_to_bottom ; TO is higher
- cmp bx, di
- ja bottom_to_top ; FROM is higher
- jb top_to_bottom ; TO is higher
- jmp exit
-
- bottom_to_top:
- mov si, TO_SEG_ADDRESS
- mov es, [si] ; to_seg to ES
- mov si, TO_OFFSET_ADDRESS
- mov di, [si] ; to_offset to DI
- mov si, BYTE_COUNT_ADDRESS
- mov cx, [si] ; byte count to CX
- mov si, FROM_SEG_ADDRESS
- mov ax, [si] ; temporary storage for new DS
- mov si, FROM_OFFSET_ADDRESS
- mov si, [si] ; from_offset to SI
- mov ds, ax ; now move from_seg to DS
- sub bx, bx ; clear BX
- shr cx, 1 ; divide by 2, remainder in CF
- rcl bx, 1 ; move CF to low bit of BX
- cld ; clear DF (go up)
- rep movsw ; the block move (count in CX)
- and bx, bx ; one extra byte?
- jz exit
- movsb ; move one last byte
- jmp exit
-
- top_to_bottom:
- mov si, TO_SEG_ADDRESS
- mov es, [si] ; to_seg to ES
- mov si, TO_OFFSET_ADDRESS
- mov di, [si] ; to_offset to DI
- mov si, BYTE_COUNT_ADDRESS
- mov cx, [si] ; byte count to CX
- mov si, FROM_SEG_ADDRESS
- mov ax, [si] ; temporary storage for new DS
- mov si, FROM_OFFSET_ADDRESS
- mov si, [si] ; from_offset to SI
- mov ds, ax ; now move from_seg to DS
- add si, cx ; move to top of block
- sub si, 2 ; we were 1 word too far
- add di, cx ; move to top of block
- sub di, 2 ; we were 1 word too far
- sub bx, bx ; clear BX
- shr cx, 1 ; divide by 2, remainder in CF
- rcl bx, 1 ; move CF to low bit of BX
- std ; set DF (go down)
- rep movsw ; the block move (count in CX)
- and bx, bx ; one extra byte?
- jz exit
- inc si ; top byte of word
- inc di ; top byte of word
- movsb ; move one last byte
-
- exit:
- POPREGS ax, bx, cx, dx, si, di, es, ds
- mov sp, bp
- pop bp
- ret (10)
-
- BlockMove endp
- ; - - - - - - - - - -
- _TEXT ENDS
- END
-
- ++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
-
-
-
-
- MULTIPLICATION AND DIVISION
-
-
- The following are routines for multiple word multiplication and division. They
- are the core routines. There must be an intermediate routine which prepares
- the information correctly for the core routine and then calls the core
- routine. Among other things, these intermediate routines must:
-
- 1) deal with signed numbers. They must convert any negative numbers into
- positive numbers and keep track of the signs. Then they must alter the
- signs of the results if necessary.
-
- 2) make copies of numbers for the core routine when the core routine will
- destroy or alter the number during the calculation.
-
- 3) make decisions about valid results for the multiplication routines. If
- we multiply two numbers of length N words, then the result can be N + N
- words long. What do you want to do if the result is over N words long? It
- is your decision.
-
- 4) transfer the results back to the programs if necessary.
-
- These are the things we did in chapter 16, and they are necessary here as
- well. In all the following routines you need to pay attention to the lengths.
- Some lengths are in BYTES and some lengths are in WORDS. Make sure you know
- which is which.
-
-
-
-
- BLOCK MULTIPLICATION
-
- The first multiplication program uses block multiplication. This is simply the
- multiple word multiplication that you did in chapters 13 and 16. This time,
- instead of multiplying n X 1 words, we will be multiplying n X n words.
- The most important thing that this routine does is minimize its work. If n =
- 100 words, then it is possible for the routine to do 10,000 multiplications.
- This takes a lot of time. If we have two 100 word numbers but the first one is
- 127,911 and the other one is 4,926,948,187,062 the first number has
- significant information in two words and the second number has significant
- information in three words. We only need to multiply 3 X 2 = 6 words instead
- of 10,000 words. As you can see, this will cut the time by a factor of over
- 1000. This routine requires that the result be distinct from either the
- multiplicand or multiplier and be n + n (2n) words long.
-
- First we clear the area for the result. The next section finds the highest
- non-zero word of both the multiplicand and multiplier. If either is 0 the
- result is 0, so we exit (the result is cleared and is 0). After that comes the
- multiplication proper. We multiply the complete multiplicand by one multiplier
- word, then cycle to the next multiplier word and so on. We add each DX:AX pair
- to the temporary result and propagate any carry that results from the
- addition. The result cannot be larger than N + N words, so we will never
- propagate past the result area. This is as fast as you can multiply numbers on
- the 8086.
-
-
- +++++++++++++++++++++ << START OF PROGRAM >> +++++++++++++++++++++++
-
- ; block multiplication using standard 8086 multiplication
- ; block_multiply ( length , multiplicand, multiplier, temp_result )
-
- ; length is the number of WORDS
- ; length is a number, but the others are addresses. The temp_result
- ; space must be (2 X length), and must be distinct from the other
- ; varibles since it will be overwritten by the routine. This is
- ; a far routine for C, and after setting up BP, we have:
- ;
- ; TEMP_RESULT_ADDRESS EQU [bp + 12]
- ; MULTIPLIER_ADDRESS EQU [bp + 10]
- ; MULTIPLICAND_ADDRESS EQU [bp + 8]
- ; DATA_LENGTH EQU [bp + 6]
-
-
- INCLUDE \pushregs.mac
- ; - - - - - - - - - - - - - - - - - - - -
- DATASTUFF SEGMENT PUBLIC 'DATA'
- multiplicand_top_address dw ?
- multiplier_top_address dw ?
- temp_bottom_address dw ?
- current_multiplier_address dw ?
- DATASTUFF ENDS
- ; - - - - - - - - - - - - - - - - - - - -
- CODESTUFF SEGMENT PUBLIC 'CODE'
-
- PUBLIC block_multiply
- ASSUME CS:CODESTUFF, DS:DATASTUFF
-
- TEMP_RESULT_ADDRESS EQU [bp + 12]
- MULTIPLIER_ADDRESS EQU [bp + 10]
- MULTIPLICAND_ADDRESS EQU [bp + 8]
- DATA_LENGTH EQU [bp + 6]
-
- ; - - - - - - - - - -
- block_multiply proc far
-
- push bp
- mov bp, sp
- pushf ; save DF value
- PUSHREGS ax, bx, cx, dx, si, di, es
- push ds ; es = ds
- pop es
-
- ; clear temp_result
- mov di, TEMP_RESULT_ADDRESS
- mov cx, DATA_LENGTH
- shl cx, 1 ; 2 X LENGTH is buffer length
- mov ax, 0 ; zero for clearing
- cld ; upwards
- rep stosw ; store ax
-
- ; find the highest multiplicand word which is non-zero
- mov di, MULTIPLICAND_ADDRESS
- mov dx, DATA_LENGTH
- mov cx, dx ; cx = length in words
- mov bx, dx
- dec bx ; first word is at offset 0
- shl bx, 1 ; bx = top word
- add di, bx ; di = address of top word
- std ; downwards
- ; ax is still 0
- repe scasw ; continue as long as es:[di] is 0
- jne first_top_found ; found non-zero word
- jmp exit_mult ; multiplicand is 0 so result is 0
-
- first_top_found:
- add di, 2 ; we went 2 too far
- mov multiplicand_top_address, di ; address of top non-zero word
-
- ; no registers have been modified except di and cx
- ; use the same ax, bx and dx values as before for multiplier.
-
- ; find the highest non-zero multiplier word
- mov di, MULTIPLIER_ADDRESS
- add di, bx ; di = address of top word
- mov cx, dx ; cx = length in words
- ; ax is still 0
- repe scasw ; continue as long as es:[di] is 0
- jne second_top_found ; found non-zero word
- jmp exit_mult ; multiplier is 0 so result is 0
-
- second_top_found:
- add di, 2 ; we went 2 too far
- mov multiplier_top_address, di ; address of top non-zero word
-
- ; the multiplication ********************
- mov ax, TEMP_RESULT_ADDRESS
- mov temp_bottom_address, ax ; start at bottom
- mov si, MULTIPLIER_ADDRESS
- mov current_multiplier_address, si ; save address
-
- outer_multiplication_loop:
- ; set up the registers
- mov cx, [si] ; move current multiplier to cx
- mov di, MULTIPLICAND_ADDRESS
- mov bx, temp_bottom_address
-
- inner_multiplication_loop:
- mov ax, cx ; multiplier word to ax
- mul WORD PTR [di] ; multiplicand - result in DX:AX
- add [bx], ax ; low word of multiplication
- adc [bx+2], dx ; high word of multiplication
- jnc no_more_carry ; extra work if CF=1
- mov si, 4
- ; keep propagating the carry till CF = 0
- propagate_carry:
- add WORD PTR [bx+si], 1
- jnc no_more_carry
- add si, 2 ; next word
- jmp propagate_carry
-
- no_more_carry:
- add bx, 2 ; next word of temp result
- add di, 2 ; next word of multiplicand
- cmp di, multiplicand_top_address ; finished?
- ja next_multiplier_word
- jmp inner_multiplication_loop
-
- next_multiplier_word:
- mov si, current_multiplier_address
- add si, 2
- cmp si, multiplier_top_address
- ja exit_mult ; end of multiplication
-
- mov current_multiplier_address, si ; save address
- add temp_bottom_address, 2 ; increment for next start
- jmp outer_multiplication_loop
-
- ; end of the multiplication *************
-
- exit_mult:
- POPREGS ax, bx, cx, dx, si, di, es
- popf ; restore DF value
- mov sp, bp
- pop bp
- ret ; a C return, so don't pop arguments.
-
- block_multiply endp
- ; - - - - - - - - - -
- CODESTUFF ENDS
- END
-
- ++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
-
-
-
- If you understand all of this you can go on. The next one is even more
- difficult.
-
-
-
- BINARY MULTIPLICTION
-
- This is how the 8086 does multiplication internally. It is a series of shifts
- and additions. We can do the same thing with base 10 numbers.
-
-
- 24763 X 275
-
- 24763 ---
- 24763 |
- 24763 5
- 24763 |
- 24763 ---
- 247630 ___
- 247630 |
- 247630 |
- 247630 7
- 247630 |
- 247630 |
- 247630 ---
- 2476300 ---
- 2476300 2
-
- 6,809,825
-
- In the base 10 system this is tedious. In the base 2 system this works well.
- You either do NO addition or you do 1 addition. We start at the bottom and add
- (either once or not at all), then shift the whole number left one bit. We
- repeat this cycle till we are finished with the whole multiplier. Once again,
- the pivotal operation is finding the highest non-zero word before starting.
- This is about 5 times slower than the first method. The only reason that it is
- here is to prepare you for the binary division routine.
-
- We need to reserve an extra word above the multiplicand. If the multiplicand
- is 6 words long, we need 7 words for the multiplicand. The 6th word will shift
- into that 7th word 1 bit at a time. At the end of our 16 bit cycle, all words
- will have shifted up one word.
-
- As the multiplication progresses, the bottom words of the multiplicand will be
- 0 so we don't bother to add these 0 words.
-
- We load the multiplier into DX one word at a time. We then check this word one
- bit at a time. If the bit is 1 we add, if the bit is 0 we do nothing. We shift
- the multiplicand left 1 bit each time, whether we add or not.
-
-
-
- +++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
-
- ; binary multiplication using shifts and addition
- ; binary_multiply ( length , multiplicand, multiplier, temp_result )
-
- ; length is the number of WORDS
- ; length is a number, but the others are addresses. The temp_result
- ; space and the multiplicand space must be ((2 X length)+1) WORDS,
- ; and must be distinct from the calling variables since they will be
- ; overwritten by the routine. This is a far routine for C, and after
- ; setting up BP, we have:
-
- ; TEMP_RESULT_ADDRESS EQU [bp + 12]
- ; MULTIPLIER_ADDRESS EQU [bp + 10]
- ; MULTIPLICAND_ADDRESS EQU [bp + 8]
- ; DATA_LENGTH EQU [bp + 6]
-
-
- include \pushregs.mac
- ; - - - - - - - - - - - - - - - - - - - -
- DATASTUFF SEGMENT PUBLIC 'DATA'
- multiplicand_length dw ?
- multiplier_length dw ?
- lowest_non_zero_word dw ?
- DATASTUFF ENDS
- ; - - - - - - - - - - - - - - - - - - - -
- CODESTUFF SEGMENT PUBLIC 'CODE'
-
- PUBLIC binary_multiply
- ASSUME cs:CODESTUFF, ds:DATASTUFF
-
- TEMP_RESULT_ADDRESS EQU [bp + 12]
- MULTIPLIER_ADDRESS EQU [bp + 10]
- MULTIPLICAND_ADDRESS EQU [bp + 8]
- DATA_LENGTH EQU [bp + 6]
-
- ; - - - - - - - - - -
- binary_multiply proc far
-
- push bp
- mov bp, sp
- pushf ; save DF value
- PUSHREGS ax, bx, cx, dx, si, di, es
- push ds ; es = ds
- pop es
-
- ; clear temp buffer
- mov di, TEMP_RESULT_ADDRESS
- mov cx, DATA_LENGTH
- shl cx, 1 ; 2 X LENGTH is buffer length
- mov ax, 0
- cld ; upwards
- rep stosw ; store ax
-
- ; find the highest word which is non-zero
- mov di, MULTIPLICAND_ADDRESS
- mov dx, DATA_LENGTH
- mov cx, dx ; cx = length in words
- mov bx, dx
- dec bx
- shl bx, 1 ; bx = top word
- add di, bx ; di = address of top word
- std ; downwards
- ; ax is still 0
- repe scasw
- jne first_top_found ; found non-zero word
- jmp exit_mult ; multiplicand is 0 so result is 0
-
- first_top_found:
- ; we went 2 too far + 2 for length + 2 extra for bit shift
- add di, 6
- sub di, MULTIPLICAND_ADDRESS
- shr di, 1 ; divide by 2
- mov multiplicand_length, di ; length in WORDS
-
- ; no registers have been modified except di and cx
- ; use the same ax, bx and dx values as before for multiplier.
-
- ; find the highest non-zero word
- mov di, MULTIPLIER_ADDRESS
- add di, bx ; di = address of top word
- mov cx, dx ; cx = length in words
- ; ax is still 0
- repe scasw
- jne second_top_found ; found non-zero word
- jmp exit_mult ; multiplier is 0 so result is 0
-
- second_top_found:
- ; we went 2 too far + 2 for length
- add di, 4
- sub di, MULTIPLIER_ADDRESS
- mov multiplier_length, di ; length in BYTES
-
- ; the multiplication ********************
- mov lowest_non_zero_word, 0
-
- multiplicand_loop:
- mov ax, lowest_non_zero_word ; # of words shifted
- cmp ax, multiplier_length ; length in bytes
- jb multiply_a_word
- jmp exit_mult ; we are through
- ; ax still has lowest word count
- multiply_a_word:
- mov si, MULTIPLIER_ADDRESS
- add si, ax ; calculate where multiplier is
- mov dx, [si] ; this is current multiplier word
- mov cx, 16 ; 16 adds and shifts
- add_and_shift_loop:
- push cx
- shr dx, 1 ; add if low bit is 1
- jnc skip_the_addition
- mov ax, lowest_non_zero_word ; offset count
- mov si, MULTIPLICAND_ADDRESS
- add si, ax
- mov bx, TEMP_RESULT_ADDRESS
- add bx, ax
- mov cx, multiplicand_length ; length in words
- clc
- inner_add_loop:
- mov ax, [si]
- adc [bx], ax
- inc si ; doesn't affect the carry flag
- inc si
- inc bx
- inc bx
- loop inner_add_loop
- adc WORD PTR [bx], 0 ; one last carry is possible
-
- skip_the_addition:
- ; shift one bit to the left
- mov si, MULTIPLICAND_ADDRESS
- add si, lowest_non_zero_word
- mov cx, multiplicand_length ; length in words
- clc
- shift_1_loop:
- rcl WORD PTR [si], 1
- inc si ; doesn't affect carry flag
- inc si
- loop shift_1_loop
-
- pop cx
- loop add_and_shift_loop
-
- add lowest_non_zero_word, 2 ; move up one word
- jmp multiplicand_loop
-
-
- ; end of the multiplication *************
-
- exit_mult:
- POPREGS ax, bx, cx, dx, si, di, es
- popf ; restore DF value
- mov sp, bp
- pop bp
- ret ; a C return, so don't pop arguments.
-
- binary_multiply endp
- ; - - - - - - - - - -
- CODESTUFF ENDS
- END
-
- ++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
-
-
-
-
- BINARY DIVISION
-
-
- This is by far the hardest to understand. The binary division routine is the
- opposite of the multiplication routine. We move the dividend to the remainder
- area since it will be modified during the routine. We shift the divisor one
- word past the top of the dividend (to make sure that the divisor starts out
- larger than the dividend) and then start the shift-subtract cycle. We shift
- right 1 bit and then take a look at the two numbers. If the divisor is larger
- than the dividend we do nothing and put a 0 bit in the quotient. If the
- divisor is smaller, we put a 1 bit in the quotient and subtract the divisor
- from the dividend. At the end, what is left of the dividend is our remainder
-
- As usual, we only use only as many words as necessary, both for the numbers
- and the individual subtractions.
-
- This is about 5 times slower than the block multiplication. It is possible to
- approach the speed of the block multiplication routine by using block division
- routine which guesses and then modifies its guess, but it would be almost
- impossible to understand what the code does, so I won't show it to you.
-
-
- +++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
-
- ; binary division using shifts and subtraction
- ; binary_divide ( length , dividend, divisor, quotient, remainder)
-
- ; length is the number of WORDS
- ; length is a number, but the others are addresses. The divisor and
- ; remainder space will be overwritten one word past the highest non-
- ; zero word by the subroutine. The remainder space is cleared one word past
- ; its length. This is a far routine for C, and after setting up BP, we have:
-
- OUR_DIVIDEND_ADDRESS EQU [bp + 14] ; same as remainder address
- REMAINDER_ADDRESS EQU [bp + 14]
- QUOTIENT_ADDRESS EQU [bp + 12]
- DIVISOR_ADDRESS EQU [bp + 10]
- DIVIDEND_ADDRESS EQU [bp + 8]
- DATA_LENGTH EQU [bp + 6]
-
- include \pushregs.mac
- ; - - - - - - - - - - - - - - - - - - - -
- DATASTUFF SEGMENT PUBLIC 'DATA'
-
- dividend_length dw ?
- divisor_length dw ?
- ; - - - -
- top_divisor_address dw ?
- bottom_divisor_address dw ?
- top_dividend_address dw ?
- bottom_dividend_address dw ?
- current_quotient_address dw ?
- ; - - - -
- shift_count dw ?
- quotient_bit dw ?
-
-
-
- DATASTUFF ENDS
- ; - - - - - - - - - - - - - - - - - - - -
- CODESTUFF SEGMENT PUBLIC 'CODE'
-
- PUBLIC binary_divide
- ASSUME cs:CODESTUFF, ds:DATASTUFF
-
- ; - - - - - - - - - -
- binary_divide proc far
-
- push bp
- mov bp, sp
- pushf ; save DF value
- PUSHREGS ax, bx, cx, dx, si, di, es
- push ds ; es = ds
- pop es
-
- ; clear quotient
- mov ax, 0 ; zero for clearing
- mov dx, DATA_LENGTH ; store for later
- mov cx, dx
- mov di, QUOTIENT_ADDRESS
- cld ; upwards
- rep stosw
- ; move dividend to remainder area
- mov si, DIVIDEND_ADDRESS
- mov di, REMAINDER_ADDRESS ; our new dividend area
- mov cx, dx ; DATA_LENGTH
- rep movsw ; upwards
- mov [di], ax ; extra 0 above dividend space
-
- ; find the highest divisor word which is non-zero
- ; dx still has DATA_LENGTH
- mov bx, dx ; dx = DATA_LENGTH
- dec bx
- shl bx, 1 ; bx = top word (in # of bytes)
- mov di, DIVISOR_ADDRESS
- mov bottom_divisor_address, di ; save for later
- add di, bx ; di = address of top word
- mov cx, dx ; cx = length in words
- std ; downwards
- ; ax is still 0
- repe scasw ; look for nonzero
- jne first_top_found ; left loop because unequal?
- int 0 ; divisor is 0 so divide error
-
- first_top_found:
- add di, 2 ; we went 2 too far
- mov top_divisor_address, di ; store for later
- sub di, DIVISOR_ADDRESS
- add di, 2 ; actual length
- mov divisor_length, di ; length in BYTES
-
- ; no registers have been modified except di and cx
- ; use the same ax, bx and dx values as before for dividend.
- ; find the highest non-zero dividend word
- ; ax is still 0 (from above)
- mov di, OUR_DIVIDEND_ADDRESS
- mov bottom_dividend_address, di ; save for later
- add di, bx ; di = address of top word
- mov cx, dx ; dx = length in words
- repe scasw ; downwards
- jne second_top_found ; equal on exit?
- jmp exit_div ; dividend = 0 so quotient is 0, remainder is 0
-
- second_top_found:
- ; add 2 for overshoot & 2 for calculating length
- ; top dividend address is just past top of dividend
- add di, 4
- mov top_dividend_address, di ; this is correct
- sub di, OUR_DIVIDEND_ADDRESS
- mov dividend_length, di ; length in BYTES
-
- ; if dividend length < divisor length, we are done
- cmp di, divisor_length
- jae shift_divisor
- jmp exit_div
-
- shift_divisor:
- ; figure out shift count.
- ; change divisor length from bytes to words
- ; di is still dividend length
- mov ax, di ; dividend_length
- mov dx, divisor_length
- sub ax, dx ; amount of shift
- add bottom_divisor_address, ax ; current bottom
- add bottom_dividend_address, ax ; current bottom
- add ax, 2 ; 2 extra bytes for shift
- mov shift_count, ax ; save shift count
- shr dx, 1 ; divisor length - BYTES to WORDS
- mov cx, dx ; cx is amount of data to shift
- inc dx ; one word extra for shift
- mov divisor_length, dx ; new divisor_length (WORDS)
- ; prepare pointers for the shift
- mov si, top_divisor_address
- mov di, si ; destination pointer
- add di, ax ; add the shift
- mov top_divisor_address, di ; new top of divisor
- rep movsw ; downwards
- ; zero bottom of divisor
- mov ax, 0
- mov cx, shift_count
- shr cx, 1 ; shift count in words
- rep stosw
-
- ; set up quotient info
- mov ax, QUOTIENT_ADDRESS
- add ax, shift_count
- sub ax, 2 ; address of top word
- mov current_quotient_address, ax
- mov quotient_bit, 0001h ; bit to rotate
-
- ; ***** the division *****************
-
- division_loop:
- cmp shift_count, 0 ; if 0, we are done
- ja do_shift_16
- jmp exit_div
-
- do_shift_16:
- ; ++++++++++ SHIFT AND SUBTRACT LOOP ++++++
- mov cx, 16
- shift_16_loop:
- push cx ; save counter
-
- ; +++++++++ SHIFT ++++++++++
- ; shift divisor one bit to the right
- ror quotient_bit, 1
- mov si, top_divisor_address
- mov cx, divisor_length ; length in words
- clc ; clear CF
- shift_1_loop:
- rcr WORD PTR [si], 1
- dec si ; doesn't affect carry flag
- dec si
- loop shift_1_loop
-
- ; +++++++++ CHECK FOR SKIP SUBTRACTION +++++++
- ; skip subtraction if dividend < divisor
- mov di, top_divisor_address
- mov si, top_dividend_address
- mov cx, divisor_length
- std ; decrement pointers
- repe cmpsw ; cmp dividend, divisor
- jb skip_subtraction ; dividend < divisor
-
- ; +++++++++++++++ SUBTRACTION ++++++++++++++++
- ; OR 1 into quotient
- mov si, current_quotient_address
- mov dx, quotient_bit
- or [si], dx
-
- mov si, bottom_divisor_address
- mov di, bottom_dividend_address
- mov cx, divisor_length ; words
- clc ; clear CF
- subtraction_loop:
- mov dx, [si]
- sbb [di], dx
- inc si
- inc si
- inc di
- inc di
- loop subtraction_loop
-
- ; dividend >= divisor, so we have no final borrow
-
- ; +++++++++++ AFTER SUBTRACTION ++++++++++++++++
- skip_subtraction:
- pop cx
- loop shift_16_loop
-
- ; reset the pointers and counters for the outer loop
- sub shift_count, 2
- sub top_divisor_address, 2
- sub top_dividend_address, 2
- sub bottom_divisor_address, 2
- sub bottom_dividend_address, 2
- sub current_quotient_address, 2
- jmp division_loop
-
- ; end of the division *************
-
- exit_div:
- POPREGS ax, bx, cx, dx, si, di, es
- popf ; restore DF value
- mov sp, bp
- pop bp
- ret ; a C return, so don't pop arguments.
-
- binary_divide endp
- ; - - - - - - - - - -
- CODESTUFF ENDS
- END
-
- ++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
-
-